{
 "nbformat": 4,
 "nbformat_minor": 2,
 "metadata": {
  "language_info": {
   "name": "python",
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "version": "3.8.1-final"
  },
  "orig_nbformat": 2,
  "file_extension": ".py",
  "mimetype": "text/x-python",
  "name": "python",
  "npconvert_exporter": "python",
  "pygments_lexer": "ipython3",
  "version": 3,
  "kernelspec": {
   "name": "python38164bit94595e38455f4cc1b10edf32f50056fb",
   "display_name": "Python 3.8.1 64-bit"
  }
 },
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np\n",
    "df_rating = pd.read_csv('rating.csv', usecols=[0,1,2])\n",
    "df_tag = pd.read_csv('tag.csv', usecols=[0,1,2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "{'creative',\n 'Own It',\n 'SUPERNATURAL ROMANCE',\n 'Sarah Polley',\n 'meryl streep',\n '1',\n 'Matt Dillon',\n 'flashy',\n 'swedish',\n 'villain nonexistent or not needed for good story',\n 'love story',\n 'original plot',\n 'nuclear',\n 'teacher',\n 'Quotable',\n 'Sidney Poitier',\n 'based on true story',\n 'instant view',\n 'horrible',\n 'conspiracy theory',\n 'cross dressing men',\n 'Grace Kelly',\n 'british',\n 'strange',\n \"so bad it's funny\",\n 'Evan Rachel Wood',\n 'Charles Bronson',\n 'ethics',\n 'nerds',\n 'slow paced',\n 'casino',\n 'fraternity',\n 'New York',\n 'Milla Jovovich',\n 'librarians',\n 'spelling bee',\n '1940s',\n 'Michael Fassbender',\n 'solitude',\n 'afternoon section',\n 'sex',\n 'San Francisco',\n 'unpredictable',\n 'Jeremy Renner',\n 'mockumentary',\n 'friendship',\n 'evolution',\n 'original',\n 'germany',\n 'lesbian subtext',\n 'sexuality',\n 'supernatural',\n 'Ingrid Bergman',\n 'fairy tale',\n 'interesting characters',\n 'Monica Bellucci',\n 'adapted from:comic',\n 'fairy tales',\n 'stoner movie',\n 'Seen 2009',\n 'complicated',\n 'performances',\n 'ugly',\n 'Jack Black',\n 'Oscar (Best Foreign Language Film)',\n 'beautiful cinematography',\n 'Christopher Lee',\n 'Nick Hornby',\n 'childish',\n 'Stephen Chow',\n 'kids',\n 'dramatic',\n 'Daniel Day-Lewis',\n 'rated-R',\n 'Matthew Broderick',\n 'Glenn Close',\n 'ambition',\n 'Antarctica',\n 'rate down',\n 'Claire Danes',\n 'spielberg',\n 'yakuza',\n 'Ben Affleck',\n 'beach',\n 'Alan Rickman',\n 'Kung Fu',\n 'nudity',\n 'witches',\n 'not scary',\n 'Richard Matheson',\n 'controversial',\n 'writers',\n 'Jared Leto',\n 'boarding school',\n 'black and white',\n 'feel-good',\n 'earnest',\n 'sex scene',\n 'product placement',\n 'thrilling',\n 'Holocaust',\n \"Tumey's To See Again\",\n 'wrongly accused',\n 'storyline',\n '1990s',\n 'storytelling',\n 'happy ending',\n 'physics',\n 'Kristen Wiig',\n 'homosexuality',\n 'pacing',\n 'Tom Hanks',\n 'time-lapse',\n 'Sigourney Weaver',\n 'Drew Barrymore',\n 'SIBLING RELATIONSHIPS',\n 'Mila Kunis',\n 'explosions',\n 'jazz',\n 'great soundtrack',\n 'Matthew McConaughey',\n \"memasa's movies\",\n 'powerful ending',\n 'amazing photography',\n 'Michael Haneke',\n 'PG13',\n 'over the top',\n 'Jean-Luc Godard',\n 'underrated',\n 'Creature Feature',\n 'Hayao Miyazaki',\n 'Chan-wook Park',\n 'François Truffaut',\n 'awesome',\n 'violent',\n 'relativity',\n 'poor dialogue',\n 'Love story',\n 'Joseph L. Mankiewicz',\n 'need to buy',\n 'imagination',\n 'Shirley Temple',\n 'vengeance',\n 'Winona Ryder',\n 'Frightening',\n 'great dialogue',\n 'Ian McKellen',\n 'Ryan Gosling',\n 'Documentary',\n 'spying',\n 'simplistic',\n 'sick',\n 'child abuse',\n '2014',\n 'Zac Efron',\n 'tense',\n 'USA film registry',\n 'Christian',\n 'My movies',\n 'score',\n 'Howard Hawks',\n 'political',\n 'sport:boxing',\n 'confusing',\n 'mother-daughter relationships',\n '11/10',\n 'Tom Waits',\n 'toys',\n 'university',\n 'justice',\n 'Malcolm McDowell',\n 'voyeurism',\n 'morgan freeman',\n 'surrealism',\n 'trippy',\n 'nude black women',\n 'Leonard Nimoy',\n 'Mike Nichols',\n 'Satoshi Kon',\n 'cathartic',\n 'soccer',\n 'drug trade',\n 'Jennifer Aniston',\n 'Dick Van Dyke',\n 'Italy',\n 'antihero',\n 'NO_FA_GANES',\n 'submarine',\n 'Direction',\n 'matter-of-fact',\n 'HEROIC MISSION',\n 'Disney animated feature',\n 'teens',\n 'shipwreck',\n 'Scotland',\n 'less than 300 ratings',\n 'Aardman',\n 'humour',\n 'John Carpenter',\n 'Olympics',\n 'youtube',\n '\\\\\\\\\"found footage\\\\\"\\\\\"\"',\n 'highly quotable',\n 'Michelle Monaghan',\n 'cops',\n 'father-son relationship',\n 'beautifully filmed',\n 'Astaire and Rogers',\n 'lone hero',\n 'millenial foreign comedies to see',\n 'journalism',\n 'DC Comics',\n 'Jet Li',\n 'Isaac Asimov',\n 'adapted from:TV series',\n 'visually stunning',\n 'feminist',\n 'Ben Kingsley',\n 'Rob Schneider',\n 'good soundtrack',\n 'Clarence Brown',\n 'need to own',\n 'David Cronenberg',\n 'Ray Liotta',\n 'MTSKAF',\n 'UNREQUITED LOVE',\n 'sport:American football',\n 'Do kupienia',\n 'Family',\n 'obsessive compulsive disorder',\n 'scenery',\n 'Tina Fey',\n 'dreamlike',\n 'Bruce Campbell',\n 'monkey',\n 'vikings',\n 'revolution',\n '1980s',\n 'goretastic',\n 'goth',\n 'Seen 2010',\n 'mental hospital',\n 'BFI modern classic',\n 'Jason Bateman',\n 'social commentary',\n 'High School',\n 'parallel universe',\n 'lengthy',\n 'editing',\n 'plot holes',\n 'terrorism',\n 'tongue-in-cheek',\n 'nationalism',\n 'perfect',\n 'quirky',\n 'intense',\n 'Sean Connery',\n 'Monty Python',\n 'Iraq War',\n 'Al Pacino',\n 'drab',\n 'weak story',\n 'secrets',\n 'REDBOX',\n 'wolves',\n 'jungle',\n 'preachy',\n 'comic books',\n 'Ending',\n 'sexual abuse',\n 'England',\n 'Bill Murray',\n 'War',\n 'screwball',\n 'sports',\n 'melancholy',\n 'immortality',\n 'dinosaurs',\n 'Amazing Cinematography',\n 'feminism',\n 'guns',\n 'resistance movement',\n 'In Netflix queue',\n 'Henry Fonda',\n 'Matthew Perry',\n 'madness',\n 'native americans',\n 'symbolism',\n 'runaway',\n 'William A. Wellman',\n 'NR',\n 'Boring',\n 'btaege',\n 'Salma Hayek',\n 'etaege',\n 'sacrifice',\n 'author:Alan Moore',\n 'Christmas',\n 'vietnam war',\n 'PROSTITUTES',\n '2.5',\n 'China',\n 'DVD-RAM',\n 'Jim Jarmusch',\n 'must see',\n 'Sexualized violence',\n 'Veja',\n 'Philip Seymour Hoffman',\n 'Zach Braff',\n 'misogyny',\n 'MOLT_CRITICADA',\n 'Nudity (Topless - Notable)',\n 'psychiatry',\n 'bloggers',\n 'Alfred Hitchcock',\n 'Catherine Keener',\n 'Vietnam War',\n 'Indiana Jones',\n 'body horror',\n 'sister sister relationship',\n 'wintry',\n 'Forest Whitaker',\n 'Civil War',\n 'spaghetti western',\n 'sandra bullock',\n 'cult film',\n 'stop-motion',\n 'chase',\n 'french',\n 'Iran',\n 'hallucination',\n 'Halloween',\n 'no happy ending',\n 'to see: horror',\n 'Do zassania',\n 'Jeff Bridges',\n 'simple plot',\n 'atmosphere',\n 'Angelina Jolie',\n 'Helena Bonham Carter',\n 'author:Stephen King',\n 'Judaism',\n 'life philosophy',\n 'American propaganda',\n 'Good Romantic Comedies',\n 'parenthood',\n '1960s',\n 'death penalty',\n 'fun',\n 'ECCENTRIC FAMILIES',\n 'photographer',\n 'lyrical',\n 'formulaic',\n 'animal:dog',\n 'Ei muista',\n 'Period piece',\n 'Franka Potente',\n 'post-apocalyptic',\n 'arnold',\n 'Magic',\n 'idiotic',\n 'Bond',\n 'boys',\n 'j netflix',\n 'autism',\n 'effects',\n 'Justin Long',\n 'smart comedy',\n 'Kevin Bacon',\n 'dumb',\n 'ecology',\n 'natural disaster',\n 'setting:LA',\n 'animation',\n 'John Waters',\n 'Chicago',\n 'sci fi',\n 'Tom Hardy',\n 'history',\n 'mental illness',\n 'capitalism',\n 'guilty pleasure',\n 'identity crisis',\n 'Suicide',\n 'Comedy Need to See',\n 'Wolfgang Petersen',\n 'DVD Collection',\n 'heroism',\n 'foreign language',\n 'space',\n 'Timothy Olyphant',\n 'Berlin',\n 'Gwyneth Paltrow',\n 'Marion Cotillard',\n 'Catherine Zeta-Jones',\n 'alternate timeline',\n 'prostitution',\n 'Robert Downey Jr.',\n 'Horror',\n 'thriller',\n 'future',\n 'giant robots',\n 'painting',\n 'Playlist',\n 'Nudity',\n 'space opera',\n 'everything',\n 'ghost story',\n 'United States',\n 'Michelle Rodriguez',\n 'Peter Sarsgaard',\n 'figure skating',\n 'alone',\n 'satire',\n 'not funny',\n 'stop motion',\n 'india',\n 'gambling',\n 'Iraq',\n 'love',\n 'Cuba Gooding Jr.',\n 'Animation',\n 'Christopher Walken',\n 'dogs',\n 'snow',\n 'airport',\n 'Liv Tyler',\n 'bureaucracy',\n 'cute',\n 'lies',\n 'Ed Harris',\n 'Charlie Sheen',\n 'Cary Elwes',\n 'retro',\n 'view askew',\n 'IMAX Digital only',\n 'compelling',\n 'Toni Collette',\n 'sexy',\n 'Chris Evans',\n 'new york city',\n 'blindfold',\n 'short film',\n 'claustrophobic',\n 'Dwayne Johnson',\n 'Romance',\n 'manipulation',\n 'Robert Duvall',\n 'Jinni Top Pick',\n 'HAUNTED BY THE PAST',\n 'corny',\n 'espionage',\n 'hope',\n 'Jena Malone',\n 'breaking the fourth wall',\n 'sad but good',\n 'library vhs',\n 'Great Depression',\n 'AFI 100',\n 'stalker',\n 'occult',\n 'Javier Bardem',\n 'denzel washington',\n 'Oscar (Best Picture)',\n \"Frankenstein's monster\",\n 'artificial intelligence',\n \"So bad it's good\",\n 'romance',\n 'mutants',\n 'South America',\n 'scandal',\n 'unlikely friendships',\n 'reality TV',\n 'unreliable narrators',\n 'Oscar (Best Effects - Visual Effects)',\n 'latin music',\n 'campy',\n 'Roald Dahl',\n 'Beatles',\n 'well done',\n 'DVD-R',\n 'Woody Allen',\n 'James Gandolfini',\n 'deafness',\n 'murder',\n 'Clive Owen',\n 'Nicholas Cage',\n 'artistic',\n 'relationships',\n 'Rupert Grint',\n 'adventure',\n 'catastrophe',\n 'annoying characters',\n 'ambiguous ending',\n 'talky',\n 'alter ego',\n 'slackers',\n 'Hitchcock',\n 'funeral',\n 'new york',\n 'G',\n '12/11',\n 'street race',\n 'franchise',\n 'gay romance',\n 'pretentious',\n 'imdb top 250',\n 'complex characters',\n 'biblical',\n 'visually appealing',\n 'complex script',\n 'Action',\n 'psychological thriller',\n 'Dreamworks',\n 'nuclear war',\n 'corporate espionage',\n 'toplist09',\n 'giant monster',\n 'Navy',\n 'literary adaptation',\n 'religious overtones',\n 'angels',\n '3.5',\n 'Julianne Moore',\n '12/09',\n 'kids and family',\n 'cancer',\n 'exaggerated',\n 'coma',\n 'My DVDs',\n 'old',\n 'Michael Winterbottom',\n 'Eric Bana',\n 'Kurt Russell',\n 'not available from Netflix',\n 'Criterion',\n 'Kieran Culkin',\n 'bullying',\n 'child actor',\n 'paradox',\n 'Superman',\n 'Stoner Movie',\n 'totalitarianism',\n 'Carl Weathers',\n 'asylum',\n 'metafiction',\n 'AFI 100 (Laughs)',\n 'excellent acting',\n 'Mars',\n 'Oscar (Best Supporting Actress)',\n 'Luke Wilson',\n 'Akira Kurosawa',\n 'strong female lead',\n 'INNOCENCE LOST',\n 'hotel',\n 'WITCHCRAFT',\n 'Frank Capra',\n 'FATHERS AND SONS',\n 'Charlton Heston',\n 'prison escape',\n 'Daniel Radcliffe',\n 'Menahem Golan',\n 'Tom Wilkinson',\n 'childhood classics',\n 'NASA',\n 'excellent script',\n 'Roger Moore',\n 'William Shatner',\n 'Rosamund Pike',\n 'Max von Sydow',\n 'videogame',\n 'Marvel Cinematic Universe',\n 'werewolves',\n 'not as good as the first',\n 'intimate',\n 'nudity (topless - notable)',\n 'fake documentary',\n 'distorted reality',\n 'Charlize Theron',\n 'unrealistic',\n 'notable soundtrack',\n 'cultural references',\n 'odd',\n 'pedophile',\n 'blaxploitation',\n 'Cuba',\n 'Paul Newman',\n 'Great Screenplays',\n 'psychiatrist as protagonist',\n 'Matt Damon',\n 'Anime',\n 'Peter Cushing',\n 'riveting',\n 'gritty',\n 'Charlie Kaufman',\n 'bloody',\n \"erlend's DVDs\",\n 'King Arthur',\n 'Toshirô Mifune',\n 'medieval',\n 'ensemble cast',\n 'Children',\n 'John Belushi',\n 'Hugo Award',\n 'Cameron Diaz',\n 'zombie',\n 'cooking',\n 'death/fatality',\n 'Jewish',\n 'business is the antagonist',\n 'story-in-a-story',\n 'chess',\n 'East Germany',\n 'mexico',\n 'Cartoon',\n 'John Travolta',\n 'character study',\n 'unfunny',\n 'Arthurian legend',\n 'german expressionism',\n 'cross dressing',\n 'Black Comedy',\n 'spiritual journey',\n 'mutation',\n 'Keanu Reeves',\n 'freedom of expression',\n 'AFI 100 (Cheers)',\n 'Paul Verhoeven',\n 'double agents',\n 'allegory',\n 'dialogue driven',\n 'Ron Perlman',\n 'Disturbing',\n 'cult',\n 'ballet',\n 'unoriginal',\n 'bab cinema',\n 'Bette Davis',\n 'Woody Harrelson',\n 'Gulf War',\n 'cynical',\n 'R',\n 'erotic',\n 'bowling',\n 'adultery',\n 'Katherine Hepburn',\n 'Rose Byrne',\n 'pseudo-intelligent',\n 'hip hop',\n 'male nudity',\n 'Violence',\n 'Takeshi Kitano',\n 'radio',\n 'folk music',\n 'genius',\n 'imdb bottom 100',\n 'contrived',\n 'betrayal',\n 'Harold Lloyd',\n 'cartoon',\n '3D',\n 'oscar (best directing)',\n 'French Film',\n 'memory',\n 'Richard Gere',\n 'nudity (rear)',\n 'Deep',\n 'Buddhism',\n 'Shia LaBeouf',\n 'Comic Book adaption',\n 'Marlon Brando',\n 'sword and sorcery',\n 'spoof',\n 'desert',\n '16th century',\n '70s',\n 'stupid ending',\n 'Lucas',\n 'Rowan Atkinson',\n 'Tokyo',\n 'Rick Moranis',\n 'Tommy Lee Jones',\n 'spirits',\n 'Sylvester Stallone',\n 'Josh Hartnett',\n 'adolescence',\n 'unsimulated sex scenes',\n 'epic',\n 'atmospheric',\n 'Science Fiction',\n 'emotional',\n 'Anthony Hopkins',\n 'Watched',\n 'Ron Howard',\n 'Charlotte Gainsbourg',\n 'Michael Moore',\n 'Fritz Lang',\n 'youth',\n 'obvious',\n 'Steve McQueen',\n 'Underrated',\n 'BBC Films',\n 'honest',\n 'chilly',\n 'H.P. Lovecraft',\n 'free to download',\n 'David Lean',\n 'Audrey Hepburn',\n 'Olivia Wilde',\n 'Steven Spielberg',\n 'flat characters',\n 'Eric Rohmer',\n 'Ennio Morricone',\n 'Dimensionalized 2-D to 3-D',\n 'Emma Stone',\n 'Kat Dennings',\n '02/10',\n 'sequels filmed simultaneously',\n 'Kirsten Dunst',\n 'George Orwell',\n 'Visuals',\n 'organized crime',\n 'Will Ferrell',\n 'heroin',\n 'John Huston',\n 'Tennessee Williams',\n 'Steven Soderbergh',\n 'Seen 2008',\n 'tasteless',\n 'mountain climbing',\n 'treasure',\n 'exploitation',\n 'cross dressing women',\n 'science fiction',\n 'Emerson must see',\n 'Kate Winslet',\n 'imaginative',\n 'surfing',\n 'Ethan Hawke',\n 'witch',\n 'post apocalyptic',\n 'Paul Giamatti',\n 'communism',\n 'found footage',\n 'incest',\n 'made me cry',\n 'little dialogue',\n 'smart',\n 'unlikeable characters',\n 'mentor/trainer',\n 'repetitive',\n 'swearing',\n 'smuggling',\n 'acting debut',\n 'Chow Yun Fat',\n 'alternate history',\n 'M. Night Shyamalan',\n 'William Wyler',\n 'Alien Invasion',\n 'Remake',\n 'Dark',\n 'buddy movie',\n 'horror',\n 'classic comedy',\n 'Paul Walker',\n 'weak plot',\n 'Favorite',\n 'music',\n 'Cute!',\n 'author:Philip K. Dick',\n 'racism',\n 'Bela Lugosi',\n 'BREAKUPS AND DIVORCES',\n 'Patrick Stewart',\n 'short',\n 'prejudice',\n 'dumb but funny',\n 'mystic warriors',\n 'Old Tucson Studios',\n 'Ewan McGregor',\n 'Dustin Hoffman',\n 'violence',\n 'suicide',\n 'Buster Keaton',\n 'Clint Eastwood',\n 'OBSESSIVE QUESTS',\n 'Psychological horror',\n 'environment',\n 'government',\n 'watch again',\n 'Drugs',\n 'corruption',\n 'Francis Ford Copolla',\n \"Tumey's VHS\",\n 'Judd Apatow',\n 'Anthony Mann',\n 'heist',\n 'visceral',\n 'Elijah Wood',\n 'Suspense',\n 'wartime',\n 'Patricia Arquette',\n 'Unexpected Ending',\n 'series',\n 'double life',\n 'silly fun',\n 'disjointed timeline',\n 'Kenneth Branagh',\n 'macho',\n 'Star Trek',\n 'Mafia',\n \"Peter O'Toole\",\n 'fantasy world',\n 'clones',\n 'movielens top pick',\n 'fate',\n 'owned',\n 'Post apocalyptic',\n 'mythology',\n 'TV',\n 'Golden Raspberry (Worst Actress)',\n 'Captain America',\n 'katsomattomat',\n 'transformation',\n 'midlife crisis',\n 'Neo-Nazis',\n 'gangs',\n 'complicated plot',\n 'should like',\n 'Golden Raspberry (Worst Actor)',\n '03/11',\n 'scary',\n 'Based on a book',\n 'Favorites',\n 'Russell Crowe',\n 'interviews',\n 'Federico Fellini',\n 'money',\n 'german',\n 'Alaska',\n 'seen at the cinema',\n 'Rome',\n 'bollywood',\n 'lesbian',\n 'Overrated',\n 'jay and silent bob',\n 'Baz Luhrmann',\n 'tearjerking',\n 'Julie Delpy',\n 'romantic comedy',\n 'very funny',\n 'Pixar animation',\n 'blind badass',\n 'facebook rec',\n 'Marx Brothers',\n 'war',\n 'Geoffrey Rush',\n 'Hugh Dancy',\n 'Dystopia',\n 'FBI',\n 'Alcatraz',\n 'war movie',\n 'Oscar (Best Editing)',\n 'Highly quotable',\n 'domestic violence',\n 'Irish',\n 'astronauts',\n 'Kate Beckinsale',\n 'Swedish',\n 'Adolf Hitler',\n 'Basil Rathbone as Sherlock Holmes',\n 'William H. Macy',\n 'japan',\n 'David Cross',\n 'movie business',\n 'Great movie',\n \"Tumey's DVDs\",\n 'depressing',\n 'culture clash',\n 'mars',\n 'marvel',\n 'Julie Andrews',\n 'curse',\n 'poignant',\n 'banned movie',\n 'village',\n 'Surrealism',\n 'twists & turns',\n 'Ken Watanabe',\n 'Ernst Lubitsch',\n 'graphic design',\n 'Character study',\n 'single mother',\n '3',\n 'French New Wave',\n 'good plot',\n 'Oscar (Best Sound)',\n 'samurai',\n 'DC',\n 'toplist12',\n '1970s',\n 'complex',\n 'Oscar (Best Animated Feature)',\n 'VistaVision',\n '3D version',\n 'childhood',\n 'talking animals',\n 'nazis',\n 'Idris Elba',\n 'Minnie Driver',\n 'shopping',\n 'painter',\n 'Nazi Germany',\n 'boring',\n 'motorcycle',\n 'Jessica Alba',\n 'bittersweet',\n 'chocolate',\n 'lovecraftian',\n 'disturbing',\n 'Michael Apted',\n 'masterpiece',\n 'Louisiana',\n 'minimalist',\n 'very good',\n 'ethnic conflict',\n 'food/cooking',\n 'growing up',\n 'brutality',\n 'trilogy',\n 'Steve Martin',\n 'cheesy',\n 'quest',\n 'ghosts',\n 'kidnapping',\n 'Agatha Christie',\n 'japanese',\n 'Mark Wahlberg',\n 'Parallel universe',\n 'nonsensical',\n 'surprise ending',\n 'boxing drama',\n 'stunning',\n 'mathematics',\n 'skinhead',\n 'writer',\n 'Stephen Frears',\n 'Aaron Eckhart',\n 'drug abuse',\n 'mask',\n 'John Goodman',\n 'Whoopi Goldberg',\n 'author:Jane Austen',\n 'gory',\n 'crazy',\n 'reflective',\n 'poets',\n 'Egypt',\n 'angry',\n 'Anamorphic Blow-Up',\n 'poetic',\n 'add to prospects list',\n 'Billy Wilder',\n 'cycling',\n 'androids',\n 'Toho',\n 'Amanda Seyfried',\n 'Joe Pesci',\n 'Neil Patrick Harris',\n 'Stanley Tucci',\n 'Vulgar',\n 'Anna Faris',\n 'Buddy movie',\n 'raunchy',\n 'western',\n 'video game',\n 'true story',\n 'thought provoking',\n 'Hunter S. Thompson',\n 'inspiring',\n 'Own',\n 'sword and sandal',\n 'opera',\n 'imaginary friend',\n 'apocalypse',\n 'poverty',\n 'cloning',\n 'Peter Sellers',\n 'Peter Jackson',\n 'divorce',\n 'fast paced',\n 'cyborgs',\n 'private detective',\n 'Gross-out',\n 'Tatsuya Nakadai',\n 'amateur detective',\n 'indecent',\n 'The Chosen One',\n 'AFI 100 (Movie Quotes)',\n 'lawyers',\n ...}"
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tag_counts = df_tag.tag.value_counts()\n",
    "top_tags = set(tag_counts[tag_counts >= 20].index)\n",
    "top_tags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "<class 'pandas.core.frame.DataFrame'>\nInt64Index: 367091 entries, 1 to 465562\nData columns (total 3 columns):\nuserId     367091 non-null int64\nmovieId    367091 non-null int64\ntag        367091 non-null object\ndtypes: int64(2), object(1)\nmemory usage: 11.2+ MB\n"
    }
   ],
   "source": [
    "df_tag = df_tag[df_tag.tag.isin(top_tags)]\n",
    "df_tag.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "(6731, 16934, 2952)\n"
    }
   ],
   "source": [
    "user_count = len(df_tag.userId.unique())\n",
    "movie_count = len(df_tag.movieId.unique())\n",
    "tag_count = len(top_tags)\n",
    "print((user_count, movie_count,tag_count))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "uid2index = dict((uid, i) for (i, uid) in enumerate(df_tag.userId.unique()))\n",
    "moive2index = dict((mid, i) for (i, mid) in enumerate(df_tag.movieId.unique()))\n",
    "tag2index = dict(((tag, index) for (index,tag) in enumerate(top_tags)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>userId</th>\n      <th>movieId</th>\n      <th>tag</th>\n      <th>tag_index</th>\n      <th>user_index</th>\n      <th>movie_index</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>65</td>\n      <td>208</td>\n      <td>dark hero</td>\n      <td>2902</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>65</td>\n      <td>353</td>\n      <td>dark hero</td>\n      <td>2902</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>65</td>\n      <td>521</td>\n      <td>noir thriller</td>\n      <td>1315</td>\n      <td>0</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>65</td>\n      <td>592</td>\n      <td>dark hero</td>\n      <td>2902</td>\n      <td>0</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>65</td>\n      <td>668</td>\n      <td>bollywood</td>\n      <td>842</td>\n      <td>0</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>465557</th>\n      <td>138446</td>\n      <td>7164</td>\n      <td>Peter Pan</td>\n      <td>2857</td>\n      <td>6730</td>\n      <td>1986</td>\n    </tr>\n    <tr>\n      <th>465558</th>\n      <td>138446</td>\n      <td>7164</td>\n      <td>visually appealing</td>\n      <td>509</td>\n      <td>6730</td>\n      <td>1986</td>\n    </tr>\n    <tr>\n      <th>465560</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>Jason Bateman</td>\n      <td>243</td>\n      <td>6730</td>\n      <td>4222</td>\n    </tr>\n    <tr>\n      <th>465561</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>quirky</td>\n      <td>254</td>\n      <td>6730</td>\n      <td>4222</td>\n    </tr>\n    <tr>\n      <th>465562</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>sad</td>\n      <td>2592</td>\n      <td>6730</td>\n      <td>4222</td>\n    </tr>\n  </tbody>\n</table>\n<p>367091 rows × 6 columns</p>\n</div>",
      "text/plain": "        userId  movieId                 tag  tag_index  user_index  \\\n1           65      208           dark hero       2902           0   \n2           65      353           dark hero       2902           0   \n3           65      521       noir thriller       1315           0   \n4           65      592           dark hero       2902           0   \n5           65      668           bollywood        842           0   \n...        ...      ...                 ...        ...         ...   \n465557  138446     7164           Peter Pan       2857        6730   \n465558  138446     7164  visually appealing        509        6730   \n465560  138446    55999       Jason Bateman        243        6730   \n465561  138446    55999              quirky        254        6730   \n465562  138446    55999                 sad       2592        6730   \n\n        movie_index  \n1                 0  \n2                 1  \n3                 2  \n4                 3  \n5                 4  \n...             ...  \n465557         1986  \n465558         1986  \n465560         4222  \n465561         4222  \n465562         4222  \n\n[367091 rows x 6 columns]"
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tag['tag_index'] = df_tag.tag.apply(lambda t: tag2index[t])\n",
    "df_tag['user_index'] = df_tag.userId.apply(lambda i: uid2index[i])\n",
    "df_tag['movie_index'] = df_tag.movieId.apply(lambda i: moive2index[i])\n",
    "df_tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[0, 0, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0],\n       ...,\n       [0, 0, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0]], dtype=int8)"
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_tag_matrix = np.zeros((user_count, tag_count), dtype = 'i1' )\n",
    "for (uid, tag_i), group_uid_tag in df_tag.groupby(['userId', 'tag_index']):\n",
    "    # print((uid2index[uid], tag_i, len(group_uid_tag)))\n",
    "    user_tag_matrix[uid2index[uid], tag_i] =  len(group_uid_tag)\n",
    "user_tag_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[0, 0, 0, ..., 0, 1, 0],\n       [0, 0, 0, ..., 0, 1, 0],\n       [0, 0, 0, ..., 0, 0, 0],\n       ...,\n       [0, 0, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0]], dtype=int16)"
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "moive_tag_matrix = np.zeros((movie_count, tag_count), dtype = 'i2' )\n",
    "for (movieId, tag_i), group_movieId_tag in df_tag.groupby(['movieId', 'tag_index']):\n",
    "    # print((moive2index[movieId], tag_i, len(group_movieId_tag)))\n",
    "    moive_tag_matrix[moive2index[movieId], tag_i] =  len(group_movieId_tag)\n",
    "moive_tag_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[ True,  True,  True, ..., False, False, False],\n       [False, False, False, ..., False, False, False],\n       [False, False, False, ..., False, False, False],\n       ...,\n       [False, False, False, ..., False, False, False],\n       [False, False, False, ..., False, False, False],\n       [False, False, False, ..., False, False, False]])"
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_movie_matrix = np.zeros((user_count, movie_count), dtype = bool)\n",
    "for (uix, mix), grouped in df_tag.groupby(['user_index', 'movie_index']):\n",
    "    # print ((uix, mix))\n",
    "    user_movie_matrix[uix, mix] = True\n",
    "user_movie_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([23,  2, 13, ..., 48,  2,  2])"
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = user_tag_matrix.astype(bool).astype('i2').sum(axis = 0)\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[0., 0., 0., ..., 0., 0., 0.],\n       [0., 0., 0., ..., 0., 0., 0.],\n       [0., 0., 0., ..., 0., 0., 0.],\n       ...,\n       [0., 0., 0., ..., 0., 0., 0.],\n       [0., 0., 0., ..., 0., 0., 0.],\n       [0., 0., 0., ..., 0., 0., 0.]])"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_tag_punished_matrix = user_tag_matrix  / np.log(1 + a)\n",
    "user_tag_punished_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[0.        , 0.        , 0.        , ..., 0.        , 0.24524606,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.22334706,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       ...,\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ]])"
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie_tag_punished_matrix = moive_tag_matrix/np.log( 1 + user_movie_matrix.astype('i2').sum(axis = 0).reshape((movie_count, 1)))\n",
    "movie_tag_punished_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[2.03865201, 7.01386979, 5.8117528 , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.55238004, 0.76444404, ..., 0.        , 0.        ,\n        0.        ],\n       ...,\n       [1.69351593, 2.31075095, 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 4.29787296, 0.69172319, ..., 0.        , 0.2254706 ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ]])"
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_movie_intrest_matrix = np.dot(user_tag_punished_matrix, movie_tag_punished_matrix.T)\n",
    "user_movie_intrest_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>userId</th>\n      <th>movieId</th>\n      <th>rating</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>7399</th>\n      <td>65</td>\n      <td>24</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>7400</th>\n      <td>65</td>\n      <td>318</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>7401</th>\n      <td>65</td>\n      <td>356</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>7402</th>\n      <td>65</td>\n      <td>364</td>\n      <td>4.5</td>\n    </tr>\n    <tr>\n      <th>7403</th>\n      <td>65</td>\n      <td>443</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>19994202</th>\n      <td>138446</td>\n      <td>88140</td>\n      <td>4.5</td>\n    </tr>\n    <tr>\n      <th>19994203</th>\n      <td>138446</td>\n      <td>89745</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>19994204</th>\n      <td>138446</td>\n      <td>90866</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>19994205</th>\n      <td>138446</td>\n      <td>91500</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>19994206</th>\n      <td>138446</td>\n      <td>91542</td>\n      <td>3.5</td>\n    </tr>\n  </tbody>\n</table>\n<p>2521621 rows × 3 columns</p>\n</div>",
      "text/plain": "          userId  movieId  rating\n7399          65       24     4.0\n7400          65      318     5.0\n7401          65      356     5.0\n7402          65      364     4.5\n7403          65      443     4.0\n...          ...      ...     ...\n19994202  138446    88140     4.5\n19994203  138446    89745     5.0\n19994204  138446    90866     4.0\n19994205  138446    91500     4.0\n19994206  138446    91542     3.5\n\n[2521621 rows x 3 columns]"
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_rating = df_rating[df_rating.userId.isin(uid2index) & df_rating.movieId.isin(moive2index)]\n",
    "df_rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": ", 3398, 3399, 3400, 3401, 3402, 3403, 3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428, 3429, 3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438, 3439, 3440, 3441, 3442, 3443, 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, 3452, 3453, 3454, 3455, 3456, 3457, 3458, 3459, 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3475, 3476, 3477, 3478, 3479, 3480, 3481, 3482, 3483, 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, 3492, 3493, 3494, 3495, 3496, 3497, 3498, 3499, 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, 3508, 3509, 3510, 3511, 3512, 3513, 3514, 3515, 3516, 3517, 3518, 3519, 3520, 3521, 3522, 3523, 3524, 3525, 3526, 3527, 3528, 3529, 3530, 3531, 3532, 3533, 3534, 3535, 3536, 3537, 3538, 3539, 3540, 3541, 3542, 3543, 3544, 3545, 3546, 3547, 3548, 3549, 3550, 3551, 3552, 3553, 3554, 3555, 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, 3564, 3565, 3566, 3567, 3568, 3569, 3570, 3571, 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3579, 3580, 3581, 3582, 3583, 3584, 3585, 3586, 3587, 3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, 3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, 3644, 3645, 3646, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, 3684, 3685, 3686, 3687, 3688, 3689, 3690, 3691, 3692, 3693, 3694, 3695, 3696, 3697, 3698, 3699, 3700, 3701, 3702, 3703, 3704, 3705, 3706, 3707, 3708, 3709, 3710, 3711, 3712, 3713, 3714, 3715, 3716, 3717, 3718, 3719, 3720, 3721, 3722, 3723, 3724, 3725, 3726, 3727, 3728, 3729, 3730, 3731, 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, 3740, 3741, 3742, 3743, 3744, 3745, 3746, 3747, 3748, 3749, 3750, 3751, 3752, 3753, 3754, 3755, 3756, 3757, 3758, 3759, 3760, 3761, 3762, 3763, 3764, 3765, 3766, 3767, 3768, 3769, 3770, 3771, 3772, 3773, 3774, 3775, 3776, 3777, 3778, 3779, 3780, 3781, 3782, 3783, 3784, 3785, 3786, 3787, 3788, 3789, 3790, 3791, 3792, 3793, 3794, 3795, 3796, 3797, 3798, 3799, 3800, 3801, 3802, 3803, 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, 3812, 3813, 3814, 3815, 3816, 3817, 3818, 3819, 3820, 3821, 3822, 3823, 3824, 3825, 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, 3834, 3835, 3836, 3837, 3838, 3839, 3840, 3841, 3842, 3843, 3844, 3845, 3846, 3847, 3848, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, 3857, 3858, 3859, 3860, 3861, 3862, 3863, 3864, 3865, 3866, 3867, 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, 3892, 3893, 3894, 3895, 3896, 3897, 3898, 3899, 3900, 3901, 3902, 3903, 3904, 3905, 3906, 3907, 3908, 3909, 3910, 3911, 3912, 3913, 3914, 3915, 3916, 3917, 3918, 3919, 3920, 3921, 3922, 3923, 3924, 3925, 3926, 3927, 3928, 3929, 3930, 3931, 3932, 3933, 3934, 3935, 3936, 3937, 3938, 3939, 3940, 3941, 3942, 3943, 3944, 3945, 3946, 3947, 3948, 3949, 3950, 3951, 3952, 3953, 3954, 3955, 3956, 3957, 3958, 3959, 3960, 3961, 3962, 3963, 3964, 3965, 3966, 3967, 3968, 3969, 3970, 3971, 3972, 3973, 3974, 3975, 3976, 3977, 3978, 3979, 3980, 3981, 3982, 3983, 3984, 3985, 3986, 3987, 3988, 3989, 3990, 3991, 3992, 3993, 3994, 3995, 3996, 3997, 3998, 3999, 4000, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008, 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4038, 4039, 4040, 4041, 4042, 4043, 4044, 4045, 4046, 4047, 4048, 4049, 4050, 4051, 4052, 4053, 4054, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, 4066, 4067, 4068, 4069, 4070, 4071, 4072, 4073, 4074, 4075, 4076, 4077, 4078, 4079, 4080, 4081, 4082, 4083, 4084, 4085, 4086, 4087, 4088, 4089, 4090, 4091, 4092, 4093, 4094, 4095, 4096, 4097, 4098, 4099, 4100, 4101, 4102, 4103, 4104, 4105, 4106, 4107, 4108, 4109, 4110, 4111, 4112, 4113, 4114, 4115, 4116, 4117, 4118, 4119, 4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132, 4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141, 4142, 4143, 4144, 4145, 4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, 4157, 4158, 4159, 4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4171, 4172, 4173, 4174, 4175, 4176, 4177, 4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185, 4186, 4187, 4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, 4196, 4197, 4198, 4199, 4200, 4201, 4202, 4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212, 4213, 4214, 4215, 4216, 4217, 4218, 4219, 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227, 4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, 4236, 4237, 4238, 4239, 4240, 4241, 4242, 4243, 4244, 4245, 4246, 4247, 4248, 4249, 4250, 4251, 4252, 4253, 4254, 4255, 4256, 4257, 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, 4266, 4267, 4268, 4269, 4270, 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, 4279, 4280, 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, 4289, 4290, 4291, 4292, 4293, 4294, 4295, 4296, 4297, 4298, 4299, 4300, 4301, 4302, 4303, 4304, 4305, 4306, 4307, 4308, 4309, 4310, 4311, 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, 4320, 4321, 4322, 4323, 4324, 4325, 4326, 4327, 4328, 4329, 4330, 4331, 4332, 4333, 4334, 4335, 4336, 4337, 4338, 4339, 4340, 4341, 4342, 4343, 4344, 4345, 4346, 4347, 4348, 4349, 4350, 4351, 4352, 4353, 4354, 4355, 4356, 4357, 4358, 4359, 4360, 4361, 4362, 4363, 4364, 4365, 4366, 4367, 4368, 4369, 4370, 4371, 4372, 4373, 4374, 4375, 4376, 4377, 4378, 4379, 4380, 4381, 4382, 4383, 4384, 4385, 4386, 4387, 4388, 4389, 4390, 4391, 4392, 4393, 4394, 4395, 4396, 4397, 4398, 4399, 4400, 4401, 4402, 4403, 4404, 4405, 4406, 4407, 4408, 4409, 4410, 4411, 4412, 4413, 4414, 4415, 4416, 4417, 4418, 4419, 4420, 4421, 4422, 4423, 4424, 4425, 4426, 4427, 4428, 4429, 4430, 4431, 4432, 4433, 4434, 4435, 4436, 4437, 4438, 4439, 4440, 4441, 4442, 4443, 4444, 4445, 4446, 4447, 4448, 4449, 4450, 4451, 4452, 4453, 4454, 4455, 4456, 4457, 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, 4466, 4467, 4468, 4469, 4470, 4471, 4472, 4473, 4474, 4475, 4476, 4477, 4478, 4479, 4480, 4481, 4482, 4483, 4484, 4485, 4486, 4487, 4488, 4489, 4490, 4491, 4492, 4493, 4494, 4495, 4496, 4497, 4498, 4499, 4500, 4501, 4502, 4503, 4504, 4505, 4506, 4507, 4508, 4509, 4510, 4511, 4512, 4513, 4514, 4515, 4516, 4517, 4518, 4519, 4520, 4521, 4522, 4523, 4524, 4525, 4526, 4527, 4528, 4529, 4530, 4531, 4532, 4533, 4534, 4535, 4536, 4537, 4538, 4539, 4540, 4541, 4542, 4543, 4544, 4545, 4546, 4547, 4548, 4549, 4550, 4551, 4552, 4553, 4554, 4555, 4556, 4557, 4558, 4559, 4560, 4561, 4562, 4563, 4564, 4565, 4566, 4567, 4568, 4569, 4570, 4571, 4572, 4573, 4574, 4575, 4576, 4577, 4578, 4579, 4580, 4581, 4582, 4583, 4584, 4585, 4586, 4587, 4588, 4589, 4590, 4591, 4592, 4593, 4594, 4595, 4596, 4597, 4598, 4599, 4600, 4601, 4602, 4603, 4604, 4605, 4606, 4607, 4608, 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616, 4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, 4641, 4642, 4643, 4644, 4645, 4646, 4647, 4648, 4649, 4650, 4651, 4652, 4653, 4654, 4655, 4656, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, 4665, 4666, 4667, 4668, 4669, 4670, 4671, 4672, 4673, 4674, 4675, 4676, 4677, 4678, 4679, 4680, 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, 4689, 4690, 4691, 4692, 4693, 4694, 4695, 4696, 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704, 4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, 4713, 4714, 4715, 4716, 4717, 4718, 4719, 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, 4728, 4729, 4730, 4731, 4732, 4733, 4734, 4735, 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743, 4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, 4752, 4753, 4754, 4755, 4756, 4757, 4758, 4759, 4760, 4761, 4762, 4763, 4764, 4765, 4766, 4767, 4768, 4769, 4770, 4771, 4772, 4773, 4774, 4775, 4776, 4777, 4778, 4779, 4780, 4781, 4782, 4783, 4784, 4785, 4786, 4787, 4788, 4789, 4790, 4791, 4792, 4793, 4794, 4795, 4796, 4797, 4798, 4799, 4800, 4801, 4802, 4803, 4804, 4805, 4806, 4807, 4808, 4809, 4810, 4811, 4812, 4813, 4814, 4815, 4816, 4817, 4818, 4819, 4820, 4821, 4822, 4823, 4824, 4825, 4826, 4827, 4828, 4829, 4830, 4831, 4832, 4833, 4834, 4835, 4836, 4837, 4838, 4839, 4840, 4841, 4842, 4843, 4844, 4845, 4846, 4847, 4848, 4849, 4850, 4851, 4852, 4853, 4854, 4855, 4856, 4857, 4858, 4859, 4860, 4861, 4862, 4863, 4864, 4865, 4866, 4867, 4868, 4869, 4870, 4871, 4872, 4873, 4874, 4875, 4876, 4877, 4878, 4879, 4880, 4881, 4882, 4883, 4884, 4885, 4886, 4887, 4888, 4889, 4890, 4891, 4892, 4893, 4894, 4895, 4896, 4897, 4898, 4899, 4900, 4901, 4902, 4903, 4904, 4905, 4906, 4907, 4908, 4909, 4910, 4911, 4912, 4913, 4914, 4915, 4916, 4917, 4918, 4919, 4920, 4921, 4922, 4923, 4924, 4925, 4926, 4927, 4928, 4929, 4930, 4931, 4932, 4933, 4934, 4935, 4936, 4937, 4938, 4939, 4940, 4941, 4942, 4943, 4944, 4945, 4946, 4947, 4948, 4949, 4950, 4951, 4952, 4953, 4954, 4955, 4956, 4957, 4958, 4959, 4960, 4961, 4962, 4963, 4964, 4965, 4966, 4967, 4968, 4969, 4970, 4971, 4972, 4973, 4974, 4975, 4976, 4977, 4978, 4979, 4980, 4981, 4982, 4983, 4984, 4985, 4986, 4987, 4988, 4989, 4990, 4991, 4992, 4993, 4994, 4995, 4996, 4997, 4998, 4999, 5000, 5001, 5002, 5003, 5004, 5005, 5006, 5007, 5008, 5009, 5010, 5011, 5012, 5013, 5014, 5015, 5016, 5017, 5018, 5019, 5020, 5021, 5022, 5023, 5024, 5025, 5026, 5027, 5028, 5029, 5030, 5031, 5032, 5033, 5034, 5035, 5036, 5037, 5038, 5039, 5040, 5041, 5042, 5043, 5044, 5045, 5046, 5047, 5048, 5049, 5050, 5051, 5052, 5053, 5054, 5055, 5056, 5057, 5058, 5059, 5060, 5061, 5062, 5063, 5064, 5065, 5066, 5067, 5068, 5069, 5070, 5071, 5072, 5073, 5074, 5075, 5076, 5077, 5078, 5079, 5080, 5081, 5082, 5083, 5084, 5085, 5086, 5087, 5088, 5089, 5090, 5091, 5092, 5093, 5094, 5095, 5096, 5097, 5098, 5099, 5100, 5101, 5102, 5103, 5104, 5105, 5106, 5107, 5108, 5109, 5110, 5111, 5112, 5113, 5114, 5115, 5116, 5117, 5118, 5119, 5120, 5121, 5122, 5123, 5124, 5125, 5126, 5127, 5128, 5129, 5130, 5131, 5132, 5133, 5134, 5135, 5136, 5137, 5138, 5139, 5140, 5141, 5142, 5143, 5144, 5145, 5146, 5147, 5148, 5149, 5150, 5151, 5152, 5153, 5154, 5155, 5156, 5157, 5158, 5159, 5160, 5161, 5162, 5163, 5164, 5165, 5166, 5167, 5168, 5169, 5170, 5171, 5172, 5173, 5174, 5175, 5176, 5177, 5178, 5179, 5180, 5181, 5182, 5183, 5184, 5185, 5186, 5187, 5188, 5189, 5190, 5191, 5192, 5193, 5194, 5195, 5196, 5197, 5198, 5199, 5200, 5201, 5202, 5203, 5204, 5205, 5206, 5207, 5208, 5209, 5210, 5211, 5212, 5213, 5214, 5215, 5216, 5217, 5218, 5219, 5220, 5221, 5222, 5223, 5224, 5225, 5226, 5227, 5228, 5229, 5230, 5231, 5232, 5233, 5234, 5235, 5236, 5237, 5238, 5239, 5240, 5241, 5242, 5243, 5244, 5245, 5246, 5247, 5248, 5249, 5250, 5251, 5252, 5253, 5254, 5255, 5256, 5257, 5258, 5259, 5260, 5261, 5262, 5263, 5264, 5265, 5266, 5267, 5268, 5269, 5270, 5271, 5272, 5273, 5274, 5275, 5276, 5277, 5278, 5279, 5280, 5281, 5282, 5283, 5284, 5285, 5286, 5287, 5288, 5289, 5290, 5291, 5292, 5293, 5294, 5295, 5296, 5297, 5298, 5299, 5300, 5301, 5302, 5303, 5304, 5305, 5306, 5307, 5308, 5309, 5310, 5311, 5312, 5313, 5314, 5315, 5316, 5317, 5318, 5319, 5320, 5321, 5322, 5323, 5324, 5325, 5326, 5327, 5328, 5329, 5330, 5331, 5332, 5333, 5334, 5335, 5336, 5337, 5338, 5339, 5340, 5341, 5342, 5343, 5344, 5345, 5346, 5347, 5348, 5349, 5350, 5351, 5352, 5353, 5354, 5355, 5356, 5357, 5358, 5359, 5360, 5361, 5362, 5363, 5364, 5365, 5366, 5367, 5368, 5369, 5370, 5371, 5372, 5373, 5374, 5375, 5376, 5377, 5378, 5379, 5380, 5381, 5382, 5383, 5384, 5385, 5386, 5387, 5388, 5389, 5390, 5391, 5392, 5393, 5394, 5395, 5396, 5397, 5398, 5399, 5400, 5401, 5402, 5403, 5404, 5405, 5406, 5407, 5408, 5409, 5410, 5411, 5412, 5413, 5414, 5415, 5416, 5417, 5418, 5419, 5420, 5421, 5422, 5423, 5424, 5425, 5426, 5427, 5428, 5429, 5430, 5431, 5432, 5433, 5434, 5435, 5436, 5437, 5438, 5439, 5440, 5441, 5442, 5443, 5444, 5445, 5446, 5447, 5448, 5449, 5450, 5451, 5452, 5453, 5454, 5455, 5456, 5457, 5458, 5459, 5460, 5461, 5462, 5463, 5464, 5465, 5466, 5467, 5468, 5469, 5470, 5471, 5472, 5473, 5474, 5475, 5476, 5477, 5478, 5479, 5480, 5481, 5482, 5483, 5484, 5485, 5486, 5487, 5488, 5489, 5490, 5491, 5492, 5493, 5494, 5495, 5496, 5497, 5498, 5499, 5500, 5501, 5502, 5503, 5504, 5505, 5506, 5507, 5508, 5509, 5510, 5511, 5512, 5513, 5514, 5515, 5516, 5517, 5518, 5519, 5520, 5521, 5522, 5523, 5524, 5525, 5526, 5527, 5528, 5529, 5530, 5531, 5532, 5533, 5534, 5535, 5536, 5537, 5538, 5539, 5540, 5541, 5542, 5543, 5544, 5545, 5546, 5547, 5548, 5549, 5550, 5551, 5552, 5553, 5554, 5555, 5556, 5557, 5558, 5559, 5560, 5561, 5562, 5563, 5564, 5565, 5566, 5567, 5568, 5569, 5570, 5571, 5572, 5573, 5574, 5575, 5576, 5577, 5578, 5579, 5580, 5581, 5582, 5583, 5584, 5585, 5586, 5587, 5588, 5589, 5590, 5591, 5592, 5593, 5594, 5595, 5596, 5597, 5598, 5599, 5600, 5601, 5602, 5603, 5604, 5605, 5606, 5607, 5608, 5609, 5610, 5611, 5612, 5613, 5614, 5615, 5616, 5617, 5618, 5619, 5620, 5621, 5622, 5623, 5624, 5625, 5626, 5627, 5628, 5629, 5630, 5631, 5632, 5633, 5634, 5635, 5636, 5637, 5638, 5639, 5640, 5641, 5642, 5643, 5644, 5645, 5646, 5647, 5648, 5649, 5650, 5651, 5652, 5653, 5654, 5655, 5656, 5657, 5658, 5659, 5660, 5661, 5662, 5663, 5664, 5665, 5666, 5667, 5668, 5669, 5670, 5671, 5672, 5673, 5674, 5675, 5676, 5677, 5678, 5679, 5680, 5681, 5682, 5683, 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, 5692, 5693, 5694, 5695, 5696, 5697, 5698, 5699, 5700, 5701, 5702, 5703, 5704, 5705, 5706, 5707, 5708, 5709, 5710, 5711, 5712, 5713, 5714, 5715, 5716, 5717, 5718, 5719, 5720, 5721, 5722, 5723, 5724, 5725, 5726, 5727, 5728, 5729, 5730, 5731, 5732, 5733, 5734, 5735, 5736, 5737, 5738, 5739, 5740, 5741, 5742, 5743, 5744, 5745, 5746, 5747, 5748, 5749, 5750, 5751, 5752, 5753, 5754, 5755, 5756, 5757, 5758, 5759, 5760, 5761, 5762, 5763, 5764, 5765, 5766, 5767, 5768, 5769, 5770, 5771, 5772, 5773, 5774, 5775, 5776, 5777, 5778, 5779, 5780, 5781, 5782, 5783, 5784, 5785, 5786, 5787, 5788, 5789, 5790, 5791, 5792, 5793, 5794, 5795, 5796, 5797, 5798, 5799, 5800, 5801, 5802, 5803, 5804, 5805, 5806, 5807, 5808, 5809, 5810, 5811, 5812, 5813, 5814, 5815, 5816, 5817, 5818, 5819, 5820, 5821, 5822, 5823, 5824, 5825, 5826, 5827, 5828, 5829, 5830, 5831, 5832, 5833, 5834, 5835, 5836, 5837, 5838, 5839, 5840, 5841, 5842, 5843, 5844, 5845, 5846, 5847, 5848, 5849, 5850, 5851, 5852, 5853, 5854, 5855, 5856, 5857, 5858, 5859, 5860, 5861, 5862, 5863, 5864, 5865, 5866, 5867, 5868, 5869, 5870, 5871, 5872, 5873, 5874, 5875, 5876, 5877, 5878, 5879, 5880, 5881, 5882, 5883, 5884, 5885, 5886, 5887, 5888, 5889, 5890, 5891, 5892, 5893, 5894, 5895, 5896, 5897, 5898, 5899, 5900, 5901, 5902, 5903, 5904, 5905, 5906, 5907, 5908, 5909, 5910, 5911, 5912, 5913, 5914, 5915, 5916, 5917, 5918, 5919, 5920, 5921, 5922, 5923, 5924, 5925, 5926, 5927, 5928, 5929, 5930, 5931, 5932, 5933, 5934, 5935, 5936, 5937, 5938, 5939, 5940, 5941, 5942, 5943, 5944, 5945, 5946, 5947, 5948, 5949, 5950, 5951, 5952, 5953, 5954, 5955, 5956, 5957, 5958, 5959, 5960, 5961, 5962, 5963, 5964, 5965, 5966, 5967, 5968, 5969, 5970, 5971, 5972, 5973, 5974, 5975, 5976, 5977, 5978, 5979, 5980, 5981, 5982, 5983, 5984, 5985, 5986, 5987, 5988, 5989, 5990, 5991, 5992, 5993, 5994, 5995, 5996, 5997, 5998, 5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, 6011, 6012, 6013, 6014, 6015, 6016, 6017, 6018, 6019, 6020, 6021, 6022, 6023, 6024, 6025, 6026, 6027, 6028, 6029, 6030, 6031, 6032, 6033, 6034, 6035, 6036, 6037, 6038, 6039, 6040, 6041, 6042, 6043, 6044, 6045, 6046, 6047, 6048, 6049, 6050, 6051, 6052, 6053, 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, 6062, 6063, 6064, 6065, 6066, 6067, 6068, 6069, 6070, 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, 6079, 6080, 6081, 6082, 6083, 6084, 6085, 6086, 6087, 6088, 6089, 6090, 6091, 6092, 6093, 6094, 6095, 6096, 6097, 6098, 6099, 6100, 6101, 6102, 6103, 6104, 6105, 6106, 6107, 6108, 6109, 6110, 6111, 6112, 6113, 6114, 6115, 6116, 6117, 6118, 6119, 6120, 6121, 6122, 6123, 6124, 6125, 6126, 6127, 6128, 6129, 6130, 6131, 6132, 6133, 6134, 6135, 6136, 6137, 6138, 6139, 6140, 6141, 6142, 6143, 6144, 6145, 6146, 6147, 6148, 6149, 6150, 6151, 6152, 6153, 6154, 6155, 6156, 6157, 6158, 6159, 6160, 6161, 6162, 6163, 6164, 6165, 6166, 6167, 6168, 6169, 6170, 6171, 6172, 6173, 6174, 6175, 6176, 6177, 6178, 6179, 6180, 6181, 6182, 6183, 6184, 6185, 6186, 6187, 6188, 6189, 6190, 6191, 6192, 6193, 6194, 6195, 6196, 6197, 6198, 6199, 6200, 6201, 6202, 6203, 6204, 6205, 6206, 6207, 6208, 6209, 6210, 6211, 6212, 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, 6221, 6222, 6223, 6224, 6225, 6226, 6227, 6228, 6229, 6230, 6231, 6232, 6233, 6234, 6235, 6236, 6237, 6238, 6239, 6240, 6241, 6242, 6243, 6244, 6245, 6246, 6247, 6248, 6249, 6250, 6251, 6252, 6253, 6254, 6255, 6256, 6257, 6258, 6259, 6260, 6261, 6262, 6263, 6264, 6265, 6266, 6267, 6268, 6269, 6270, 6271, 6272, 6273, 6274, 6275, 6276, 6277, 6278, 6279, 6280, 6281, 6282, 6283, 6284, 6285, 6286, 6287, 6288, 6289, 6290, 6291, 6292, 6293, 6294, 6295, 6296, 6297, 6298, 6299, 6300, 6301, 6302, 6303, 6304, 6305, 6306, 6307, 6308, 6309, 6310, 6311, 6312, 6313, 6314, 6315, 6316, 6317, 6318, 6319, 6320, 6321, 6322, 6323, 6324, 6325, 6326, 6327, 6328, 6329, 6330, 6331, 6332, 6333, 6334, 6335, 6336, 6337, 6338, 6339, 6340, 6341, 6342, 6343, 6344, 6345, 6346, 6347, 6348, 6349, 6350, 6351, 6352, 6353, 6354, 6355, 6356, 6357, 6358, 6359, 6360, 6361, 6362, 6363, 6364, 6365, 6366, 6367, 6368, 6369, 6370, 6371, 6372, 6373, 6374, 6375, 6376, 6377, 6378, 6379, 6380, 6381, 6382, 6383, 6384, 6385, 6386, 6387, 6388, 6389, 6390, 6391, 6392, 6393, 6394, 6395, 6396, 6397, 6398, 6399, 6400, 6401, 6402, 6403, 6404, 6405, 6406, 6407, 6408, 6409, 6410, 6411, 6412, 6413, 6414, 6415, 6416, 6417, 6418, 6419, 6420, 6421, 6422, 6423, 6424, 6425, 6426, 6427, 6428, 6429, 6430, 6431, 6432, 6433, 6434, 6435, 6436, 6437, 6438, 6439, 6440, 6441, 6442, 6443, 6444, 6445, 6446, 6447, 6448, 6449, 6450, 6451, 6452, 6453, 6454, 6455, 6456, 6457, 6458, 6459, 6460, 6461, 6462, 6463, 6464, 6465, 6466, 6467, 6468, 6469, 6470, 6471, 6472, 6473, 6474, 6475, 6476, 6477, 6478, 6479, 6480, 6481, 6482, 6483, 6484, 6485, 6486, 6487, 6488, 6489, 6490, 6491, 6492, 6493, 6494, 6495, 6496, 6497, 6498, 6499, 6500, 6501, 6502, 6503, 6504, 6505, 6506, 6507, 6508, 6509, 6510, 6511, 6512, 6513, 6514, 6515, 6516, 6517, 6518, 6519, 6520, 6521, 6522, 6523, 6524, 6525, 6526, 6527, 6528, 6529, 6530, 6531, 6532, 6533, 6534, 6535, 6536, 6537, 6538, 6539, 6540, 6541, 6542, 6543, 6544, 6545, 6546, 6547, 6548, 6549, 6550, 6551, 6552, 6553, 6554, 6555, 6556, 6557, 6558, 6559, 6560, 6561, 6562, 6563, 6564, 6565, 6566, 6567, 6568, 6569, 6570, 6571, 6572, 6573, 6574, 6575, 6576, 6577, 6578, 6579, 6580, 6581, 6582, 6583, 6584, 6585, 6586, 6587, 6588, 6589, 6590, 6591, 6592, 6593, 6594, 6595, 6596, 6597, 6598, 6599, 6600, 6601, 6602, 6603, 6604, 6605, 6606, 6607, 6608, 6609, 6610, 6611, 6612, 6613, 6614, 6615, 6616, 6617, 6618, 6619, 6620, 6621, 6622, 6623, 6624, 6625, 6626, 6627, 6628, 6629, 6630, 6631, 6632, 6633, 6634, 6635, 6636, 6637, 6638, 6639, 6640, 6641, 6642, 6643, 6644, 6645, 6646, 6647, 6648, 6649, 6650, 6651, 6652, 6653, 6654, 6655, 6656, 6657, 6658, 6659, 6660, 6661, 6662, 6663, 6664, 6665, 6666, 6667, 6668, 6669, 6670, 6671, 6672, 6673, 6674, 6675, 6676, 6677, 6678, 6679, 6680, 6681, 6682, 6683, 6684, 6685, 6686, 6687, 6688, 6689, 6690, 6691, 6692, 6693, 6694, 6695, 6696, 6697, 6698, 6699, 6700, 6701, 6702, 6703, 6704, 6705, 6706, 6707, 6708, 6709, 6710, 6711, 6712, 6713, 6714, 6715, 6716, 6717, 6718, 6719, 6720, 6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 6729, 6730])"
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_fav = dict (\n",
    "    (uid2index[uid], set(grouped.movieId.apply(lambda m: moive2index[m])))\n",
    "    for uid, grouped in df_rating[df_rating.rating>= 3].groupby('userId')\n",
    ")\n",
    "user_fav.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_recommend = dict(\n",
    "    (\n",
    "        uix, \n",
    "        set(np.where(user_movie_intrest_matrix[uix] >= 1)[0])\n",
    "    )\n",
    "    for uix in range(user_count)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "('precision', 0.15547000747075737)\n('recall', 0.13647440342022432)\n"
    }
   ],
   "source": [
    "ac_quantity = 0\n",
    "recommend_quantity = 0 \n",
    "user_fav_quantity = 0\n",
    "for uix in range(user_count):\n",
    "    if uix not in user_fav:\n",
    "        # recommend_quantity += len(user_recommend[uix])\n",
    "        continue\n",
    "    ac_quantity += len(user_recommend[uix] &\n",
    "     user_fav[uix])\n",
    "    recommend_quantity += len(user_recommend[uix])\n",
    "    user_fav_quantity += len(user_fav[uix])\n",
    "\n",
    "print(('precision', ac_quantity/ recommend_quantity))\n",
    "print(('recall', ac_quantity/(user_fav_quantity + 1)))"
   ]
  }
 ]
}